import pickle

import joblib
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
class train_model():
    def __init__(self,file_path):
        self.data = pd.read_csv(file_path)
        self.model = MultinomialNB()

    def extract_email_feature(self):
        train_data = self.data
        # 提取特征
        vectorizer = CountVectorizer(max_features=10000)
        emails = vectorizer.fit_transform(train_data['contents'])
        joblib.dump(vectorizer, 'vectorizer.pkl')
        return emails,train_data['labels']

    def train(self,x,y):

        self.model.fit(x, y)

    def save_model(self):
        joblib.dump(self.model,'aaa.pth')

# a = train_model('./data/03训练集.csv')
# x,y = a.extract_email_feature()
# a.train(x,y)
# a.save_model()




